library(tidyverse)
library(ggplot2)
library(cregg)
library(cjoint)
library(readstata13)
library(data.table)
library(qwraps2)
library(survey)
library(janitor)
#plot theme
plot_theme =  theme_bw()+ theme(text = element_text(size=10), 
                                panel.grid.major = element_blank(),
                                panel.grid.minor = element_blank(),
                                panel.grid.major.y = element_line(colour = "#CCCCCC"),
                                axis.line = element_line(colour = "black"),
                                strip.background=element_rect(colour=NA, fill="#DDDDDD"),
                                legend.title=element_blank(), 
                                legend.position="top",
                                axis.title=element_text(size=14,face="bold"), 
                                axis.text=element_text(size=12),
                                strip.text.x = element_text(size = 12),
                                legend.text=element_text(size=12), 
                                axis.text.x = element_text(angle = 45, hjust = 1, vjust=1)) 
#plot features
pd<-position_dodge(.2)
pd_text<-position_dodge(1)

#number of runs for boot strap 
runs = 3000


### Table A1
### Bovitz sample 
bovitz_demos_df <- read.dta13("../final_w1w2.dta")


#pid
bovitz_demos_df <- bovitz_demos_df %>% mutate(party_id = case_when( 
  q2_w1 == 1  ~ "Democratic", 
  q2_w1 == 2 ~ "Republican",
  q2_w1 == 3 ~ "Independent",
  q2_w1 == 4 ~ "Other",
  q2_w1 == 5 ~ "Don't know",
  TRUE ~ NA_character_
)) 


#income
bovitz_demos_df <- bovitz_demos_df %>% mutate(income =  case_when( 
  inc <= 2  ~ "Less than $30,000", 
  inc > 2 & inc < 6 ~ "Between $30,000 and $59,000",
  inc >= 6 & inc < 10 ~ "Between $60,000 and $120,000",
  inc >= 10 ~ "More than $120,000",
  inc == 5 ~ "Don't know",
  TRUE ~ NA_character_), income = factor(income, levels=c("Less than $30,000" , "Between $30,000 and $59,000" , "Between $60,000 and $120,000" , "More than $120,000")))

#gender
bovitz_demos_df <- bovitz_demos_df %>% mutate(gender_demo =  case_when( 
  gender == 1  ~ "Male", 
  gender == 2  ~ "Female",
  gender == 3 ~ "Other",
  is.na(gender) ~ "Other",
  TRUE ~ NA_character_), gender_demo = factor(gender_demo, levels=c("Male" , "Female" , "Other")))



#edu
bovitz_demos_df <- bovitz_demos_df %>% mutate(edu =  case_when( 
  q22_w1 == 1 ~  "Less than high school", 
  q22_w1 == 2 ~  "High school graduate", 
  q22_w1 == 3 ~  "Some college", 
  q22_w1 == 4 ~  "2 year degree", 
  q22_w1 == 5 ~  "4 year degree", 
  q22_w1 == 6 ~  "Post-grad", 
  q22_w1 == 7 ~  "Post-grad", 
  TRUE ~ NA_character_), edu = factor(edu, levels=c(
    "Less than high school", "High school graduate", "Some college", "2 year degree", "4 year degree", "Post-grad" )))


#age 
bovitz_demos_df <- bovitz_demos_df %>% mutate(age = age+17, age_range = case_when(
  age >= 18 & age <= 29 ~ "18--29",
  age >= 30 & age <= 39 ~ "30--39",
  age >= 40 & age <= 49 ~ "40--49",
  age >= 50 & age <= 59 ~ "50--59",
  age >= 60 & age <= 69 ~ "60--69",
  age >= 70 ~  "70+", 
  TRUE ~ NA_character_), age_range = factor(age_range, levels= c("18--29", "30--39", "40--49", "50--59", "60--69", "70+")))



#get demos for unique individuals in the main analysis
bovitz_demo <- bovitz_demos_df %>% select(edu,  income, gender_demo, age_range, party_id, wave1_only, task_num, profile_chosen)  %>% filter(is.na(wave1_only) & task_num == 1 & profile_chosen == 1 )


options(qwraps2_markup="markdown")
bovitz_demos_summary<- list("Gender" =  
                       list("Male"=  ~ n_perc(gender_demo == "Male", na_rm=TRUE),
                            "Female" = ~ n_perc(gender_demo == "Female", na_rm=TRUE),
                            "Other" = ~ n_perc(gender_demo == "Other", na_rm=TRUE)), 
                     "Income" = 
                       list("Less than $30,000"=  ~ n_perc(income == "Less than $30,000", na_rm=TRUE),
                            "Between $30,000 and $59,000" = ~ n_perc(income == "Between $30,000 and $59,000", na_rm=TRUE),
                            "Between $60,000 and $120,000" = ~ n_perc(income == "Between $60,000 and $120,000", na_rm=TRUE),
                            "More than $120,000" = ~ n_perc(income == "More than $120,000", na_rm=TRUE)),
                     "Age" = 
                       list("18--29" =  ~ n_perc(age_range == "18--29", na_rm=TRUE), 
                            "30--39" =  ~ n_perc(age_range == "30--39", na_rm=TRUE), 
                            "40--49" =  ~ n_perc(age_range == "40--49", na_rm=TRUE), 
                            "50--59" =  ~ n_perc(age_range == "50--59", na_rm=TRUE), 
                            "60--69" =  ~ n_perc(age_range == "60--69", na_rm=TRUE), 
                            "70+"    =  ~ n_perc(age_range == "70+", na_rm=TRUE)), 
                     "Party ID" = 
                       list("Democratic"=  ~ n_perc(party_id == "Democratic", na_rm=TRUE),
                            "Republican"=  ~ n_perc(party_id == "Republican", na_rm=TRUE),
                            "Independent" =  ~ n_perc(party_id == "Independent", na_rm=TRUE),
                            "Other"=  ~ n_perc(party_id == "Other" | party_id == "Don't know" , na_rm=TRUE)),
                     "Education" =
                       list("Less than high school"=  ~ n_perc(edu == "Less than high school", na_rm=TRUE),
                            "High school graduate"=  ~ n_perc(edu == "High school graduate", na_rm=TRUE),
                            "Some college" =  ~ n_perc(edu == "Some college", na_rm=TRUE),
                            "2 year degree"=  ~ n_perc(edu == "2 year degree", na_rm=TRUE), 
                            "4 year degree"=  ~ n_perc(edu == "4 year degree", na_rm=TRUE), 
                            "Post-grad"=  ~ n_perc(edu == "Post-grad", na_rm=TRUE)))

bovitz_demos_table <- summary_table(bovitz_demo, bovitz_demos_summary) 
bovitz_demos_table

## lucid 

lucid_demos_df <- read.dta13("Figure A6/lucid_formatted.dta")  


lucid_demos_df <- lucid_demos_df %>% mutate(party_id = factor(Q2, levels=c("Democrat", "Republican", "Independent", "Other", "Don't know")))

lucid_demos_df <- lucid_demos_df %>% mutate(gender = factor(Q52, levels=c("Male" , "Female" , "Other")))

lucid_demos_df <- lucid_demos_df %>% mutate(edu =  case_when( 
  Q53 == "Professional degree" ~  "Post-grad", 
  Q53 == "Doctorate" ~  "Post-grad", 
  TRUE ~ Q53), edu = factor(edu, levels=c(
    "Less than high school", "High school graduate", "Some college", "2 year degree", "4 year degree", "Post-grad" )))


lucid_demos_df <- lucid_demos_df %>% mutate(age_range = case_when(
  Q49 >= 18 & Q49 <= 29 ~ "18--29",
  Q49 >= 30 & Q49 <= 39 ~ "30--39",
  Q49 >= 40 & Q49 <= 49 ~ "40--49",
  Q49 >= 50 & Q49 <= 59 ~ "50--59",
  Q49 >= 60 & Q49 <= 69 ~ "60--69",
  Q49 >= 70 ~  "70+", 
  TRUE ~ NA_character_), age_range = factor(age_range, levels= c("18--29", "30--39", "40--49", "50--59", "60--69", "70+")))


lucid_demos_df <- lucid_demos_df %>% mutate(income =  case_when( 
  hhi == -3105 ~ NA_character_,
  hhi >= 1 & hhi <= 4  ~ "Less than $30,000", 
  hhi >= 5 & hhi <= 10 ~ "Between $30,000 and $59,000",
  hhi >= 11 & hhi <= 19 ~ "Between $60,000 and $125,000",
  hhi >= 20 ~ "More than $125,000",
  TRUE ~ NA_character_), income = factor(income, levels=c("Less than $30,000" , "Between $30,000 and $59,000" , "Between $60,000 and $125,000" , "More than $125,000")))


#compelte cases
lucid_demo <- lucid_demos_df %>% select(edu, income, gender, age_range, party_id, task_num, profile_chosen) %>% filter(task_num ==1 & profile_chosen == 1) %>% na.omit()


library(qwraps2)
options(qwraps2_markup="markdown")
lucid_demos_summary<- list("Gender" =  
                             list("Male"=  ~ n_perc(gender == "Male"),
                                  "Female" = ~ n_perc(gender == "Female"),
                                  "Other" = ~ n_perc(gender == "Other")), 
                           "Income" = 
                             #note the mismatch on 120 vs 125. see footnote in appendix
                             list("Less than $30,000"=  ~ n_perc(income == "Less than $30,000", na_rm=TRUE),
                                  "Between $30,000 and $59,000" = ~ n_perc(income == "Between $30,000 and $59,000", na_rm=TRUE),
                                  "Between $60,000 and $120,000" = ~ n_perc(income == "Between $60,000 and $125,000", na_rm=TRUE),
                                  "More than $120,000" = ~ n_perc(income == "More than $125,000", na_rm=TRUE)),
                           "Age" = 
                             list("18--29" =  ~ n_perc(age_range == "18--29", na_rm=TRUE), 
                                  "30--39" =  ~ n_perc(age_range == "30--39", na_rm=TRUE), 
                                  "40--49" =  ~ n_perc(age_range == "40--49", na_rm=TRUE), 
                                  "50--59" =  ~ n_perc(age_range == "50--59", na_rm=TRUE), 
                                  "60--69" =  ~ n_perc(age_range == "60--69", na_rm=TRUE), 
                                  "70+"    =  ~ n_perc(age_range == "70+", na_rm=TRUE)), 
                           "Party ID" = 
                             list("Democratic"=  ~ n_perc(party_id == "Democrat", na_rm=TRUE),
                                  "Republican"=  ~ n_perc(party_id == "Republican", na_rm=TRUE),
                                  "Independent" =  ~ n_perc(party_id == "Independent", na_rm=TRUE),
                                  "Other"=  ~ n_perc(party_id == "Other", na_rm=TRUE)),
                           "Education" =
                             list("Less than high school"=  ~ n_perc(edu == "Less than high school", na_rm=TRUE),
                                  "High school graduate"=  ~ n_perc(edu == "High school graduate", na_rm=TRUE),
                                  "Some college" =  ~ n_perc(edu == "Some college", na_rm=TRUE),
                                  "2 year degree"=  ~ n_perc(edu == "2 year degree", na_rm=TRUE), 
                                  "4 year degree"=  ~ n_perc(edu == "4 year degree", na_rm=TRUE), 
                                  "Post-grad"=  ~ n_perc(edu == "Post-grad", na_rm=TRUE)))

lucid_demos_table <- summary_table(lucid_demo, lucid_demos_summary)
lucid_demos_table


##cces -- can't use summary table becuase it doesn't do weights. 
cces_demos_df <- read.dta13("CCES/cces.dta")
#pid
cces_demos_df <- cces_demos_df %>% mutate(pid_demos = case_when( 
  pid3_leaner == 1  ~ "Democrat", 
  pid3_leaner == 2 ~ "Republican",
  pid3_leaner == 3 ~ "Independent",
  pid3_leaner == 8 ~ "Other",
  TRUE ~ NA_character_)) %>%  mutate(pid_demos = factor(pid_demos, levels=c("Democrat", "Republican", "Independent", "Other")))

#income 
cces_demos_df <- cces_demos_df %>% 
  mutate(income_range = factor(income_range, levels=c("Less than $30,000", "Between $30,000 and $59,999", "Between $60,000 and $120,000", "$120,000 or more", "Prefer not to say")))

#age
cces_demos_df <- cces_demos_df %>% 
  mutate(age_range = factor(age_range, levels= c("18--29", "30--39", "40--49", "50--59", "60--69", "70+")))

#gender
cces_demos_df <- cces_demos_df %>% mutate(gender_demos =  case_when( 
  gender == 1  ~ "Male", 
  gender == 2  ~ "Female",
  TRUE ~ NA_character_), gender_demos = factor(gender_demos, levels=c("Male" , "Female", "Other")))

#edu
cces_demos_df <- cces_demos_df %>% mutate(edu =  case_when( 
  educ == 1 ~  "Less than high school", 
  educ == 2 ~  "High school graduate", 
  educ == 3 ~  "Some college", 
  educ == 4 ~  "2 year degree", 
  educ == 5 ~  "4 year degree", 
  educ == 6 ~  "Post-grad", 
  TRUE ~ NA_character_), edu = factor(edu, levels=c(
    "Less than high school", "High school graduate", "Some college", "2 year degree", "4 year degree", "Post-grad" )))

#sample weights
cces_design <- svydesign(ids=~0, weights=~weight, data=cces_demos_df)

#gender
cces_gender <- svytable(~gender_demos, cces_design) %>% 
        as_tibble() %>% mutate(variable = "Gender") %>%
        rename(level = gender_demos)
#partyid
cces_pid <- svytable(~pid_demos, cces_design) %>% 
  as_tibble() %>% mutate(variable = "Party ID") %>%
  rename(level = pid_demos) 

#income 
cces_inc <- svytable(~income_range, cces_design) %>% 
  as_tibble() %>% 
  mutate(variable = "Income")  %>%
  rename(level = income_range) %>% 
  filter(level != "Prefer not to say")
  

#education 
cces_edu <- svytable(~edu, cces_design)  %>% 
  as_tibble() %>% mutate(variable = "Education") %>%
  rename(level = edu)

#age
cces_age <- svytable(~age_range, cces_design) %>% 
  as_tibble() %>% mutate(variable = "Age") %>%
  rename(level = age_range)


cces_bind <- rbind(cces_gender,cces_inc, cces_age, cces_pid, cces_edu) %>%
  group_by(variable) %>%
  mutate(total = sum(n), prop = round((n/total*100), 2)) %>%
  mutate(prop = paste0(prop,"%"))
cces_bind

#grab the summary table object from lucid and replace the stats with our manually created ones. 
cces_demos_table <- lucid_demos_table
cces_demos_table[1:23] <- cces_bind$prop
dimnames(cces_demos_table)[[2]] <- "2019 Cooperative Congressional Election Study (N=18,000, weighted) Comparison"
cces_demos_table


demographics <- cbind(bovitz_demos_table, lucid_demos_table, cces_demos_table)


#Output table A1
TableA1File<-file("TableA1.txt")
writeLines(qable(demographics), TableA1File)
close(TableA1File)



### Table A2
###party ID
bovitz_demos_df <- bovitz_demos_df %>% 
  mutate(pid3_lean_factor = case_when(
  pid3_lean == 1 ~ "Democrat", 
  pid3_lean == 2 ~ "Independent",
  pid3_lean == 3 ~ "Republican"))

#wave 1
w1pid <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>% 
  tabyl(pid3_lean_factor) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% as_tibble() %>% mutate(wave = "Wave 1") %>% 
  rename(demo = pid3_lean_factor)


##wave 2
w2pid <- bovitz_demos_df %>% 
  filter(task_num == 1 & profile_chosen == 1) %>% 
  tabyl(pid3_lean_factor) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% as_tibble() %>% mutate(wave = "Wave 2") %>% 
  rename(demo = pid3_lean_factor)
   

## college or more 
bovitz_demos_df <-  bovitz_demos_df %>% mutate(college_more = case_when(edu == "4 year degree" | edu == "Post-grad" ~ "College or more", 
                                                   !is.na(edu) ~ "Less than college"))
w1edu <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
  tabyl(college_more) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% as_tibble() %>% 
  mutate(wave = "Wave 1") %>% 
  rename(demo = college_more)

w2edu <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1)) %>%
  tabyl(college_more) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% as_tibble() %>% 
  mutate(wave = "Wave 2")  %>% 
  rename(demo = college_more)

#white 
bovitz_demos_df <-  bovitz_demos_df %>% mutate(white = case_when(race == 1 ~ "White", 
                                                                        !is.na(race) ~ "Non-white"))

w1race <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
  tabyl(white) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% 
  as_tibble() %>% 
  mutate(wave ="Wave 1")  %>% 
  rename(demo = white)

w2race <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) ) %>%
  tabyl(white) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% 
  as_tibble() %>% 
  mutate(wave ="Wave 2") %>%
  rename(demo = white)

#gender 
w1gender <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
  tabyl(gender_demo) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% 
  as_tibble() %>% 
  mutate(wave ="Wave 1") %>%
  rename(demo = gender_demo)

w2gender <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) ) %>%
  tabyl(gender_demo) %>%
  adorn_totals() %>%
  adorn_pct_formatting() %>% 
  as_tibble() %>% 
  mutate(wave ="Wave 2")  %>%
  rename(demo = gender_demo)


##ethno
w1ethno <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
  summarize(eth_mean = mean(ethnocentrism), eth_sd = sd(ethnocentrism), n = n())  %>%
  mutate(wave = "Wave 2", demo = "Ethnocentrism", percent = paste0("Mean=", round(eth_mean, 3), ", SD=", round(eth_sd, 3))) %>%
  select(demo, percent, wave, n)

w2ethno <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1)) %>%
  summarize(eth_mean = mean(ethnocentrism), eth_sd = sd(ethnocentrism), n = n()) %>%
  mutate(wave = "Wave 1", demo = "Ethnocentrism", percent = paste0("Mean=", round(eth_mean, 3), ", SD=", round(eth_sd, 3))) %>%
  select(demo, percent, wave, n) 

 

nw2<- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) ) %>%
  nrow() 

nw1 <- bovitz_demos_df %>% 
  filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
  nrow()


panel_attrition <- rbind(w1pid, w2pid, w1edu, w2edu, w1gender, w2gender, w1race, w2race, w1ethno, w2ethno ) %>% 
  filter(demo == "College or more" |
           demo == "Female" |
           demo == "White" |
           demo == "Democrat"|  demo == "Republican"|  demo == "Independent"| 
           demo == "Ethnocentrism") %>% select(demo, percent, wave) %>% 
   mutate(wave = case_when(wave == "Wave 1" ~ paste0(wave, " n=", nw1), 
                           wave == "Wave 2"  ~ paste0(wave, " n=", nw2))) %>% 
   pivot_wider(names_from = wave, values_from = percent)

panel_attrition

#Output table A2
TableA2File<-file("TableA2.txt")
writeLines(qable(panel_attrition), TableA2File)
close(TableA2File)





results_data <- read.dta13("../final_w1w2.dta")
  

#format
results_data<- results_data %>% 
  filter(is.na(wave1_only)) %>%
  mutate(country = factor(country, levels=c("United States", "Germany", "A country outside the United States", "China"))) %>%
  mutate(mark_up_cat = case_when(
    mark_up == "1" ~ "100", 
    mark_up == "0.25" ~ "25", 
    mark_up == "0.5" ~ "50",
    mark_up == "0" ~ "0")) %>% 
  mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100")))


#bin our ethnocentrism measure 
results_data <- results_data %>%
  mutate(ethno_factor = case_when(
    ethnocentrism <= .375 ~ "Low ethnocentrism", 
    ethnocentrism > .375 & ethnocentrism < .625  ~ "Medium ethnocentrism", 
    ethnocentrism >= .625  ~ "High ethnocentrism", 
  )) %>% mutate(ethno_factor = as.factor(ethno_factor))


#### Figure A2
#### MM by product

c_design <- profile_chosen ~ mark_up_cat + rating_cat + country 
mm_by_product <- cj(results_data, c_design, id = ~id, estimate = "mm", by = ~product_cat)
mm_by_product <- mm_by_product %>% mutate(feature_label = case_when(
  feature == "mark_up_cat" ~ "Mark up over base price (%)", 
  feature == "rating_cat"  ~ "Rating by past customers",
  feature == "country"     ~ "Country of origin")) 

marginal_means_plot_by_product <- ggplot(mm_by_product, aes(y=estimate, x=level, color=product_cat,  group=product_cat)) +
  facet_grid(feature_label~., scales="free") + 
  coord_flip() + plot_theme + ylab("Marginal mean") + xlab("") + 
  geom_point(position=position_dodge(width=.75)) + 
  geom_errorbar(aes(ymin= lower, ymax= upper), width=0, position=position_dodge(width=.75)) + 
  labs(col="Product") + theme(legend.position="right")
marginal_means_plot_by_product
ggsave("FigureA2.png",marginal_means_plot_by_product, dpi=300, width=8, height=9)

#### Figure A3
#### MM by task number 

results_data <- results_data %>% mutate(task_num = as.factor(task_num))
mm_by <- cj(results_data, c_design, id = ~id, estimate = "mm", by = ~ task_num)
mm_by <- mm_by %>% mutate(feature_label = case_when(
  feature == "mark_up_cat" ~ "Mark up over base price (%)", 
  feature == "rating_cat"  ~ "Rating by past customers",
  feature == "country"     ~ "Country of origin")) %>%
  mutate(task_num = factor(task_num, levels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")))
marginal_means_plot_by_task <- ggplot(mm_by, aes(y=estimate, x=level, color=task_num,  group=task_num)) +
  facet_grid(feature_label~., scales="free") + 
  coord_flip() + plot_theme + ylab("Marginal mean") + xlab("") + 
  geom_point(position=position_dodge(width=.75)) + 
  #geom_text(aes(label=round(estimate, digits = 2)),size = 2, nudge_x=.25) +
  geom_errorbar(aes(ymin= lower, ymax= upper), size=1, width=0, position=position_dodge(width=.75)) + 
  labs(col="Task number") + theme(legend.position="right")
marginal_means_plot_by_task
ggsave("FigureA3.png",marginal_means_plot_by_task, dpi=300, width=8, height=9)


#### Figure A4 
#### AMCE by product category 



##### what is main effect of treatments? #####
feature_names <- c("Mark up over base price (%)" , "Rating by past customers", "Country of origin" )

main_effects <- cj(results_data, c_design, id = ~task_num)
main_effects <- main_effects %>% mutate(feature_label = case_when(
  feature == "mark_up_cat" ~ feature_names[1], 
  feature == "rating_cat"  ~ feature_names[2],
  feature == "country"     ~ feature_names[3])) %>%
  mutate(feature_label = factor(feature_label, levels=rev(feature_names)))

#### appliances  ####
appliances <- c("Washing Machine", "Toaster", "Microwave")
main_effects_appliances <- cj(filter(results_data, product %in% appliances), c_design, id = ~task_num)
main_effects_appliances <- main_effects_appliances %>% mutate(feature_label = case_when(
  feature == "mark_up_cat" ~ feature_names[1], 
  feature == "rating_cat"  ~ feature_names[2],
  feature == "country"     ~ feature_names[3])) %>%
  mutate(feature_label = factor(feature_label, levels=rev(feature_names)))

#### food ####
food <- c("1 pound of butter", "1 pound of cheese", "1 pound of coffee")
main_effects_food <- cj(filter(results_data, product %in% food), c_design, id = ~task_num)
main_effects_food <- main_effects_food %>% mutate(feature_label = case_when(
  feature == "mark_up_cat" ~ feature_names[1], 
  feature == "rating_cat"  ~ feature_names[2],
  feature == "country"     ~ feature_names[3])) %>%
  mutate(feature_label = factor(feature_label, levels=rev(feature_names)))


#### Household goods #### 
household <- c("Roll of black duct tape", "4 rolls of paper towels", "96 oz laundry detergent", "12-inch non-stick skillet", "Small bottle of super glue", "Cell phone screen protector")

main_effects_household<- cj(filter(results_data, product %in% household), c_design, id = ~task_num)
main_effects_household <- main_effects_household %>% mutate(feature_label = case_when(
  feature == "mark_up_cat" ~ feature_names[1], 
  feature == "rating_cat"  ~ feature_names[2],
  feature == "country"     ~ feature_names[3])) %>%
  mutate(feature_label = factor(feature_label, levels=rev(feature_names)))

## label 
main_effects_household$products <- "Household goods"
main_effects_food$products <- "Food"
main_effects_appliances$products <- "Appliances"
main_effects$products <- "All products"

product_results <- bind_rows(main_effects_household, main_effects_food, main_effects_appliances, main_effects)

pd_combined<-position_dodge(.75)
products_plot <- ggplot(product_results, aes(y=estimate, x=level, group=products)) + geom_hline(yintercept = 0, color="#A0A0A0" ) +
  facet_grid(.~feature_label, scales="free") + 
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0, position=pd_combined)+
  plot_theme + ylab("AMCE relative to base attribute level") + xlab("") + 
  geom_point(aes(fill=products, shape=products),color="black",size=4,position=pd_combined) +
  scale_shape_manual(values=c(21,22, 23, 24))

products_plot
ggsave("FigureA4.png",products_plot, dpi=300, width=12, height=8)





######
# Figure A5
######

# Center and Right Panel - AMCE and MTWP across whole sample - takes a long time to run.
# Note that aggregated markup estimates are presented in Figure A5 in the appendix. 

data_for_cjoint <- results_data %>%  dplyr::select(profile_chosen, mark_up_cat, rating_cat, country, id, task_num, product_cat, ethno_factor) %>%
  drop_na() %>%
  mutate(country = relevel(country, ref="United States")) %>%
  mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))

results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$id)
set.seed(1234)


for (run in 1:runs) {
  # sample with replacement from a vector as long as our dataset. 
  # we want a sample that matches our sample size. will have repeated observations. 
  # create the new data set
  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  
  # this is *much* more efficient in data.table
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country, data=bootSample, respondent.id = "id")
  
  #store estimates 
  country <-  as.tibble(t(results_vector[[run]]$estimates$country), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  #store the estimates
  estimates_results[[run]] <- bind_rows(country, markup) %>% mutate(run = run)
  #clear memory 
  rm(country, markup)
}

# combine results from the bootstrap 
full_sample_results <- bind_rows(estimates_results) 
# clean out 
rm(estimates_results, results_vector)

# Disaggregated by markup 
# get the markup coeffs to joint back into the dataset
full_sample_markups <- full_sample_results %>% filter(grepl('markup', est)) %>% 
  select(est, AMCE, run) %>%
  rename(markup_est = AMCE, markup = est)

# join markup coefficients and calculate MWTP
full_sample_results_summary_disag <-  full_sample_results %>% 
  filter(grepl('country', est)) %>%  
  left_join(full_sample_markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = AMCE/(markup_est/as.numeric(markup_num))) %>%
  group_by(est, markup_num) %>%
  summarise(mean_amce = mean(AMCE), 
            lower_amce =quantile(AMCE, probs=c(.025)), 
            upper_amce = quantile(AMCE, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975))) %>%
  mutate(ethno_factor = "Full sample") %>%
  rename(main = est)

# grab MTWP
mwtp_full_sample_results_disag <- full_sample_results_summary_disag %>% 
  select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor, markup_num ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")

###############################
# Estimates by level of ethnocentrism 
###############################

results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$id)
set.seed(1234)

for (run in 1:runs) {
  # sample with replacement from a vector as long as our dataset. 
  # we want a sample that matches our sample size. will have repeated observations. 
  # create the new data set
  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  
  #sample
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "id")
  
  #store estimates 
  country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
  country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
  ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
  rm(country_ethno,country,ethno,markup)
}

#combine results 
results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )
#rm(estimates_results, results_vector)

# pull out the markup estimates
markups <- results %>% filter(grepl('markup', main)) %>% 
  select(main, AMCE, run) %>%
  rename(markup_est = AMCE, markup = main)

# pull out estimates for high ethnocentrism  
high_ethno <- results %>% filter(grepl('country', main) & is.na(conditional)) %>% 
  mutate(ethno_factor = "High ethnocentrism") %>%
  select(`Conditional Estimate`, ethno_factor, run, main) %>%
  rename(estimate = `Conditional Estimate` )

# pull out estimates for low ethnocentrism 
low_ethno <- results %>% 
  filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
  mutate(ethno_factor = "Low ethnocentrism") %>%
  group_by(run, main, ethno_factor) %>%
  summarize( estimate =  sum (`Conditional Estimate`))  %>%
  ungroup()
  



## Binned ethnocentrism results, disaggregated by markup
full_results_disag <- bind_rows(high_ethno, low_ethno) %>% 
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
  group_by(main, ethno_factor, markup_num) %>%
  summarise(mean_amce = mean(estimate), 
            lower_amce =quantile(estimate, probs=c(.025)), 
            upper_amce = quantile(estimate, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975)))


mwtp_results_disag <- full_results_disag %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor, markup_num) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")

# Combine results that are aggregated by markup 
binned_results_disag <- bind_rows(mwtp_full_sample_results_disag, mwtp_results_disag) %>%
  rename(estimate = mean, BY = ethno_factor, statistic = analysis, level = main)


# prep results for plot 
for_plot_disag <- binned_results_disag %>%
  filter(statistic == "MWTP") %>%
  mutate(level = str_remove(level, "^country")) %>%
  mutate(level= case_when(level == "AcountryoutsidetheUnitedStates" | level == "A country outside the United States" ~ "Outside the\nUnited States", 
                          TRUE ~ level)) %>% 
  mutate(BY = case_when(
    BY == "Full sample" ~ "Sample-wide average", 
    TRUE ~ BY )) %>%
  mutate(BY = factor(BY, levels=c("Sample-wide average", "Low ethnocentrism", "High ethnocentrism"))) %>%
  mutate(level = factor(level, levels = c("China",  "Outside the\nUnited States", "Germany", "United States"))) %>%
  mutate(facet_label = paste0("Markup: ", markup_num, "% over baseline")) %>%
  mutate(facet_label = factor(facet_label, levels = c("Markup: 25% over baseline", "Markup: 50% over baseline", "Markup: 100% over baseline")))



mtwp_disag_plot <- ggplot(data=for_plot_disag, aes(y=estimate, x=level, group=BY)) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0, position=pd_combined)+
  geom_point(aes(fill=BY, shape=BY),color="black",size=4,position=pd_combined) + 
  xlab("Country") +
  ylab("Estimate") +
  scale_shape_manual(values=c(22,21, 23, 24))+
  facet_wrap(facet_label~.) + plot_theme
mtwp_disag_plot


ggsave("FigureA5.png", mtwp_disag_plot, dpi=600, width=14, height=7)



##############
#### Figure A7
##############

#### Conditional Logit #### 
library(survival)

results_vector = list()
est_list = list()
resp_ids <- unique(results_data$id)
results_data <- results_data %>% group_by(task_num,id) %>% 
  mutate(strata_id = group_indices()) %>% mutate(country = relevel(country, ref="United States"))

### binned estimates
set.seed(1234)
for (run in 1:runs) {

  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  bootSample <- as.data.table(results_data)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  results_vector[[run]] <- clogit(profile_chosen~mark_up+country*ethno_factor+rating_cat+strata(strata_id),  data=bootSample, method="approximate")
  est_list[[run]] <-as.tibble(results_vector[[run]]$coefficients, rownames="coefficient")
  est_list[[run]]$run <- run
}

clogit_bs_results <- bind_rows(est_list)

markups <- clogit_bs_results %>% filter(grepl('mark_up', coefficient)) %>% rename(markup_est= value) %>% select(run, markup_est)

high_ethno <- clogit_bs_results %>% filter(grepl('country', coefficient)) %>% 
  filter(!(grepl(':ethno_factorMedium ethnocentrism', coefficient))) %>%
  filter(!(grepl(':ethno_factorLow ethnocentrism', coefficient))) %>%
  mutate(ethno_factor = "High ethnocentrism") 

low_ethno <- clogit_bs_results %>% 
  filter(grepl('country', coefficient)) %>% 
  filter(!(grepl(':ethno_factorMedium ethnocentrism', coefficient))) %>%
  separate(coefficient, sep=":", c("coefficient", "conditional")) %>%
  group_by(run, coefficient) %>%
  summarize( value =  sum (value)) %>% 
  mutate(ethno_factor = "Low ethnocentrism") 


full_results_clogit <- bind_rows(high_ethno, low_ethno) %>% 
  left_join(markups, by=c("run")) %>%
  mutate(mwtp = value/markup_est) %>%
  group_by(coefficient, ethno_factor) %>%
  summarise(mean_mwtp = mean(mwtp)*100,
            lower_mwtp =quantile(mwtp, probs=c(.025))*100, 
            upper_mwtp = quantile(mwtp, probs=c(.975))*100)


# Sample-wide estimates  
set.seed(1234)
est_list = list()
for (run in 1:runs) {
  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  bootSample <- as.data.table(results_data)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  results_vector[[run]] <- clogit(profile_chosen~mark_up+country+rating_cat+strata(strata_id),  data=bootSample, method="approximate")
  est_list[[run]] <-as.tibble(results_vector[[run]]$coefficients, rownames="coefficient")
  est_list[[run]]$run <- run
}

clogit_bs_results_unconditional <- bind_rows(est_list)
markups <- clogit_bs_results_unconditional %>% filter(grepl('mark_up', coefficient)) %>% rename(markup_est= value) %>% select(run, markup_est)

unconditional_clogit <-  clogit_bs_results_unconditional %>%  filter(grepl('country', coefficient)) %>%
  left_join(markups, by=c("run")) %>%
  mutate(mwtp = value/markup_est) %>%
  group_by(coefficient) %>%
  summarise(mean_mwtp = mean(mwtp)*100,
            lower_mwtp =quantile(mwtp, probs=c(.025))*100, 
            upper_mwtp = quantile(mwtp, probs=c(.975))*100) %>%
  mutate(ethno_factor = "Sample-wide average") 


clogit_for_plot <- bind_rows(unconditional_clogit, full_results_clogit  ) %>% 
  mutate(coefficient = str_replace(coefficient, "country", "")) %>%
  mutate(ethno_factor = factor(ethno_factor, levels=c("Sample-wide average", "Low ethnocentrism", "High ethnocentrism"))) %>%
  mutate(coefficient = case_when(coefficient == "A country outside the United States" ~ "Outside the\nUnited States", TRUE ~ coefficient)) %>%
  mutate(coefficient = factor(coefficient, levels = c("China",  "Outside the\nUnited States", "Germany", "United States"))) 

### put together plot
pd_combined<-position_dodge(.75)
clogit_mtwp_averages_plot <- ggplot(data=clogit_for_plot, aes(y=mean_mwtp, x=coefficient, group=ethno_factor)) +
  geom_errorbar(aes(ymin=lower_mwtp, ymax=upper_mwtp), width=0, position=pd_combined)+
  geom_point(aes(fill=ethno_factor, shape=ethno_factor),color="black",size=4,position=pd_combined) + 
  xlab("Country") +
  ylab("Estimate") +
  scale_shape_manual(values=c(22,21, 23, 24))+ plot_theme
clogit_mtwp_averages_plot


ggsave("FigureA7.png", clogit_mtwp_averages_plot, dpi = 300, width=8.5, height=6, units="in")





#### Table A5
## read data fresh

##############
##With College Education#### 
##########

##Read in data 
results_data <- read.dta13("../final_w1w2.dta")  %>% filter(is.na(wave1_only))
with(results_data, table(educ))
results_data <- results_data %>% 
  filter(educ >=4)
nrow(results_data)

#format
results_data<- results_data %>% 
  mutate(country = factor(country, levels=c("United States", "Germany", "A country outside the United States", "China"))) %>%
  mutate(mark_up_cat = case_when(
    mark_up == "1" ~ "100", 
    mark_up == "0.25" ~ "25", 
    mark_up == "0.5" ~ "50",
    mark_up == "0" ~ "0")) %>% 
  mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100")))


#bin our ethnocentrism measure 
results_data <- results_data %>%
  mutate(ethno_factor = case_when(
    ethnocentrism <= .375 ~ "Low ethnocentrism", 
    ethnocentrism > .375 & ethnocentrism < .625  ~ "Medium ethnocentrism", 
    ethnocentrism >= .625  ~ "High ethnocentrism", 
  )) %>% mutate(ethno_factor = as.factor(ethno_factor))


data_for_cjoint <- results_data %>%  dplyr::select(profile_chosen, mark_up_cat, rating_cat, country, id, product_cat, ethno_factor) %>%
  drop_na() %>%
  mutate(country = relevel(country, ref="United States")) %>%
  mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))


###############################
# Estimates by level of ethnocentrism 
###############################

results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$id)
set.seed(1234)

for (run in 1:runs) {
  # sample with replacement from a vector as long as our dataset. 
  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  
  #sample
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "id")
  
  #store estimates 
  country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
  country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
  ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
  rm(country_ethno,country,ethno,markup)
}

#combine results 
results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )

# pull out the markup estimates
markups <- results %>% filter(grepl('markup', main)) %>% 
  select(main, AMCE, run) %>%
  rename(markup_est = AMCE, markup = main)

# pull out estimates for high ethnocentrism  
high_ethno <- results %>% 
  filter(grepl('country', main) & is.na(conditional)) %>% 
  mutate(ethno_factor = "High ethnocentrism") %>%
  select(`Conditional Estimate`, ethno_factor, run, main) %>%
  rename(estimate = `Conditional Estimate` )

# pull out estimates for medium ethnocentrism  
medium_ethno <- results %>% 
  filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorMediumethnocentrism") ) %>%
  mutate(ethno_factor = "Medium ethnocentrism") %>%
  group_by(run, main, ethno_factor) %>%
  summarize( estimate =  sum (`Conditional Estimate`))  %>%
  ungroup()

# pull out estimates for low ethnocentrism 
low_ethno <- results %>% 
  filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
  mutate(ethno_factor = "Low ethnocentrism") %>%
  group_by(run, main, ethno_factor) %>%
  summarize( estimate =  sum (`Conditional Estimate`))  %>%
  ungroup()

## Binned ethnocentrism results, averaging over the markups
full_results <- bind_rows(high_ethno, medium_ethno, low_ethno) %>% 
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
  group_by(main, ethno_factor) %>%
  summarise(mean_amce = mean(estimate), 
            lower_amce =quantile(estimate, probs=c(.025)), 
            upper_amce = quantile(estimate, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975)))

full_results_college<-full_results

mwtp_results_college <- full_results_college %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")
mwtp_results_college

##Without College Education####


##Read in data (assuming WD set to same directory as data)
results_data <- read.dta13("../final_w1w2.dta")  %>% filter(is.na(wave1_only))
with(results_data, table(educ)) 
results_data <- results_data %>% 
  filter(educ <4)
nrow(results_data)

#format
results_data<- results_data %>% 
  mutate(country = factor(country, levels=c("United States", "Germany", "A country outside the United States", "China"))) %>%
  mutate(mark_up_cat = case_when(
    mark_up == "1" ~ "100", 
    mark_up == "0.25" ~ "25", 
    mark_up == "0.5" ~ "50",
    mark_up == "0" ~ "0")) %>% 
  mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100")))


#bin our ethnocentrism measure 
results_data <- results_data %>%
  mutate(ethno_factor = case_when(
    ethnocentrism <= .375 ~ "Low ethnocentrism", 
    ethnocentrism > .375 & ethnocentrism < .625  ~ "Medium ethnocentrism", 
    ethnocentrism >= .625  ~ "High ethnocentrism", 
  )) %>% mutate(ethno_factor = as.factor(ethno_factor))

data_for_cjoint <- results_data %>%  dplyr::select(profile_chosen, mark_up_cat, rating_cat, country, id, product_cat, ethno_factor) %>%
  drop_na() %>%
  mutate(country = relevel(country, ref="United States")) %>%
  mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))

###############################
# Estimates by level of ethnocentrism 
###############################

results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$id)
set.seed(1234)

for (run in 1:runs) {
  # sample with replacement from a vector as long as our dataset. 
  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  
  #sample
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "id")
  
  #store estimates 
  country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
  country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
  ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
  rm(country_ethno,country,ethno,markup)
}

#combine results 
results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )

# pull out the markup estimates
markups <- results %>% filter(grepl('markup', main)) %>% 
  select(main, AMCE, run) %>%
  rename(markup_est = AMCE, markup = main)

# pull out estimates for high ethnocentrism  
high_ethno <- results %>% 
  filter(grepl('country', main) & is.na(conditional)) %>% 
  mutate(ethno_factor = "High ethnocentrism") %>%
  select(`Conditional Estimate`, ethno_factor, run, main) %>%
  rename(estimate = `Conditional Estimate` )

# pull out estimates for medium ethnocentrism  
medium_ethno <- results %>% 
  filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorMediumethnocentrism") ) %>%
  mutate(ethno_factor = "Medium ethnocentrism") %>%
  group_by(run, main, ethno_factor) %>%
  summarize( estimate =  sum (`Conditional Estimate`))  %>%
  ungroup()

# pull out estimates for low ethnocentrism 
low_ethno <- results %>% 
  filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
  mutate(ethno_factor = "Low ethnocentrism") %>%
  group_by(run, main, ethno_factor) %>%
  summarize( estimate =  sum (`Conditional Estimate`))  %>%
  ungroup()

## Binned ethnocentrism results, averaging over the markups
full_results_nocollege <- bind_rows(high_ethno, medium_ethno, low_ethno) %>% 
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
  group_by(main, ethno_factor) %>%
  summarise(mean_amce = mean(estimate), 
            lower_amce =quantile(estimate, probs=c(.025)), 
            upper_amce = quantile(estimate, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975)))

full_results_nocollege<-full_results_nocollege


mwtp_results_nocollege <- full_results_nocollege %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")


mwtp_results_college <- mwtp_results_college %>% mutate(aware = "High awareness")
mwtp_results_nocollege <- mwtp_results_nocollege %>% mutate(aware = "Low awareness")

mwtip_by_aware <- rbind(mwtp_results_college, mwtp_results_nocollege) %>% 
  mutate(main = str_remove(main, "country")) %>%
  mutate(main = case_when(main== "AcountryoutsidetheUnitedStates" ~ "Outside U.S.", 
                           TRUE ~ main)) %>%
  mutate(mwtp_w_ci = paste0 (round(mean,2), " (", round(lower,2), ", ", round(upper,2), ")" )) %>%
  select(main, ethno_factor, aware,mwtp_w_ci ) %>%
  mutate(main = factor(main, levels = c("China", "Germany",  "Outside U.S."))) %>%
  mutate(ethno_factor = factor(ethno_factor, levels = c("Low ethnocentrism", "Medium ethnocentrism", "High ethnocentrism"))) %>%
  mutate(aware = factor(aware, levels = c("Low awareness", "High awareness"))) %>% arrange(aware) %>%
  pivot_wider(names_from=aware, values_from=mwtp_w_ci ) %>% arrange(main, ethno_factor) %>%
  rename(Country = main, Ethnocentrism = ethno_factor) %>%
  qable()

#Output table A5
TableA5File<-file("TableA5.txt")
writeLines(mwtip_by_aware, TableA5File)
close(TableA5File)


##TableA6 

ft_data <- read.dta13("../final_w1w2.dta") %>% filter(is.na(wave1_only)) %>%
  select(id, ft_mexico, ft_china, ft_UK, ft_germ, ft_japan, ft_PR, ft_us) %>%
  group_by(id) %>%
  pivot_longer(cols =starts_with("ft_"), names_to="country") %>%
  na.omit() %>%
  mutate(country = case_when(country == "ft_mexico" ~ "Mexico",
                             country == "ft_UK" ~ "United Kingdom",
                             country == "ft_us" ~ "United States",
                             country == "ft_germ" ~ "Germany",
                             country == "ft_japan" ~ "Japan",
                             country == "ft_china" ~ "China",
                             country == "ft_PR" ~ "Puerto Rico")) %>%
  group_by(country) %>%
  summarize(ft_mean = mean(value), ft_sd = sd(value)) %>%
  mutate(`Mean (SD)` = paste0(round(ft_mean, 2), " (", round(ft_sd, 2), ")")) %>%
  rename(Country = country) %>%
  select(Country, `Mean (SD)`) %>% qable()


#Output table A6
TableA6File<-file("TableA6.txt")
writeLines(ft_data, TableA6File)
close(TableA6File)





